#include <stdio.h>
#include <stdlib.h>
#include "SuffixTree.h"
#include "Bits.h"
#include "ExactSearch.h"

int maxOccurences;
DividerElement* dividers;
unsigned int totalDividers;
int found;
unsigned int nextSubTree;

char * get01Encoding(char *dnatosearch, int length, char *encodedDNA)
{	
	
	
	int i,j;
	for(i=0,j=0;i<length;i++,j+=2)
	{
		char currChar=dnatosearch[i];
		switch (currChar)		
		{
			case 'a': case 'A': 
				encodedDNA[j]=0; 
				encodedDNA[j+1]=0;
				break;
			case 'c': case 'C': 
				encodedDNA[j]=0; 
				encodedDNA[j+1]=1; 
				break;
					
			case 'g': case 'G': 
				encodedDNA[j]=1; 
				encodedDNA[j+1]=0; 
				break;
					
			case 't': case 'T': 
				encodedDNA[j]=1; 
				encodedDNA[j+1]=1; 
				break;
				
			default:						
				break;
		}
	}	
	
	return 0;
}

int locateTree(DividerElement *dividers,  unsigned int bitPrefix)
{
	unsigned int i=0;
	
//	printf("Locating subtree for bit prefix %ul\n",bitPrefix);
//	printBitSequence(&bitPrefix);
	while (i<totalDividers)
	{
		DividerElement currDivider=dividers[i];
	//	printf("Divider %i is from %ul to %ul\n",i, currDivider.minBitSequence,currDivider.maxBitSequence );
	//	printBitSequence(&currDivider.minBitSequence);
	//	printBitSequence(&currDivider.maxBitSequence);
		if(currDivider.minBitSequence<=bitPrefix && bitPrefix<=currDivider.maxBitSequence) 
			return i;
		i++;
	}
	return -1;
}

unsigned int getBitPrefix(char *pattern01, int patternLength)
{
	int i;
	unsigned int ret=0;
	
	for(i=0;i<MIN(32,patternLength);i++)
	{
		if(pattern01[i]==1)
			setBit(&ret,i);
	}
	
	return ret;
}

int traverse(STNode *tree, int nodePos, STNode *currNode, char *pattern01, 
		int patternLength, int currPatternPos, int *depth)
{	
	int nextChar;
	int childIndex;
	//int nextLeafIndex;

	int nodeLength=(*currNode).length;
	
	if(nodeLength>=(patternLength-currPatternPos))
	{		
		return  nodePos;
	}

	currPatternPos+=nodeLength;
	(*depth)=currPatternPos;
	nextChar=pattern01[currPatternPos];
	childIndex=(*currNode).children[nextChar];
	if(childIndex==0)
	{
		return -1;

	}

	return traverse(tree, childIndex , &tree[childIndex] ,pattern01,patternLength, currPatternPos,depth);

}

int blindSearch(char *pattern01,int patternLength,  STNode *tree, int *depth)
{
	STNode *root=&tree[0];
	int index=(*root).children[(int)(pattern01[0])];
	if(index==0)
		return -1;
	
	return traverse(tree,index, &tree[index], pattern01,patternLength, 0, depth);
}

void findFirstLeaf(STNode *tree, int parentindex, int *fileNumber, int *startPos, 
		int *depth, int patternLength)
{
	int i;
	STNode *currParent=&tree[parentindex];

	
	for(i=0;i<2;i++)
	{
		int childIndex=(*currParent).children[i];
		if(childIndex>0)
		{
			*depth+=(*currParent).length;
			findFirstLeaf(tree,childIndex,fileNumber,startPos,depth,patternLength);
			return;
		}
	}
	
	//currParent is a leaf
	(*fileNumber)=(*currParent).fileNumber;
	(*startPos)=(*currParent).startPos-(*depth);
	return;
}

void collectAllLeaves(STNode *tree, int parentindex, int *counter, ResultRecord *occurences, 
		int patternLength, int depth )
{
	int currdepth=depth;
	short isAleaf=1;
	int i;
	STNode *parent=&tree[parentindex];	
	STNode *nextLeaf;

	
	
	if(parent->nextLeaf)
	{
		nextLeaf=&tree[parent->nextLeaf];
		if((*counter)<maxOccurences)
			{
				occurences[*counter].fileNumber=(*nextLeaf).fileNumber;
				occurences[*counter].startPos=((*nextLeaf).startPos)/2;
			}

		(*counter)++;

		while(nextLeaf->nextLeaf)
		{
			nextLeaf=&tree[nextLeaf->nextLeaf];
			if((*counter)<maxOccurences)
			{
				occurences[*counter].fileNumber=(*nextLeaf).fileNumber;
				occurences[*counter].startPos=((*nextLeaf).startPos)/2;
			}
			(*counter)++;
		}
	}

	
			
	for(i=0;i<2;i++)
	{
		int childIndex=(*parent).children[i];
		if(childIndex>0)
		{
			isAleaf=0;
			collectAllLeaves(tree, childIndex, counter, occurences, 
					patternLength, currdepth+(*parent).length );
		}
	}
	
	if(isAleaf)
	{
		if((*counter)<maxOccurences)
			{
				occurences[*counter].fileNumber=(*parent).fileNumber;
				occurences[*counter].startPos=((*parent).startPos-currdepth)/2;
			}

		(*counter)++;						
	}
}

void findLeaf(int startPos, int fileNumber, STNode *tree, int parentindex, int *counter, ResultRecord *occurences, 
		int patternLength, int depth )
{
	//printf("Looking for %d in file %d\n",startPos,fileNumber);
	int currdepth=depth;
	short isAleaf=1;
	int i;
	STNode *parent=&tree[parentindex];	
	STNode *nextLeaf;	
	
	if(parent->nextLeaf)
	{
		nextLeaf=&tree[parent->nextLeaf];
		//printf("%d %d\n",(*nextLeaf).fileNumber,((*nextLeaf).startPos)/2);
		if((*nextLeaf).fileNumber==fileNumber && ((*nextLeaf).startPos)/2==startPos)
		{
			found=1;
			return ;
		}
		

		while(nextLeaf->nextLeaf)
		{
			nextLeaf=&tree[nextLeaf->nextLeaf];
		//	printf("%d %d\n",(*nextLeaf).fileNumber,((*nextLeaf).startPos)/2);
			if((*nextLeaf).fileNumber==fileNumber && ((*nextLeaf).startPos)/2==startPos)
			{
				found=1;
				return ;
			}	
		}
	}

	
			
	for(i=0;i<2;i++)
	{
		int childIndex=(*parent).children[i];
		if(childIndex>0)
		{
			isAleaf=0;
			findLeaf(startPos, fileNumber,tree, childIndex, counter, occurences, 
					patternLength, currdepth+(*parent).length );
		}
	}
	
	if(isAleaf)
	{
		//printf("%d %d\n",(*parent).fileNumber,((*parent).startPos-currdepth)/2);
		if((*parent).fileNumber==fileNumber && ((*parent).startPos-currdepth)/2==startPos)
		{
			found=1;
			return ;
		}						
	}
	
}

int findAllOccurences(char *treeprefix, char *inputfilenameprefix, char *pattern,
		int patternLength, ResultRecord *occurences, int *count, char *pattern01, char *buffer)
{
	int i;
	
	
//	char dividersFileName[MAX_PATH_LENGTH];
	FILE *treeFile;
	char treeFileName[MAX_PATH_LENGTH];
	unsigned int totalNodes;
	STNode *tree;
	char inputFileName[MAX_PATH_LENGTH];
	FILE *inputFile;
	int binaryPatternLength=patternLength+patternLength;
	unsigned int bitPrefix;
	int treeFileNumber;
	int depth=0;
	int stindex;
	//1. Tree is 01 - convert pattern into 01 alphabet
	
	int fileNumber;
	int binaryStartPos;
	int depth1=depth;
	int startPos;
	

	
	
	
	get01Encoding(pattern,patternLength,pattern01);	
	//3. We need a binary prefix of length 32 - to locate dividers
	bitPrefix = getBitPrefix(pattern01,binaryPatternLength);
	
	//4. Locate 1-2 trees to be searched - for now suppose 1 tree
	treeFileNumber=locateTree(dividers,bitPrefix);	
	
	if(treeFileNumber==-1)
	{
		printf("Pattern not found (tree location)\n");
		exit(1);
	}
	
	//5. Open and read suffix tree file
	sprintf(treeFileName,"%s_%i",treeprefix,treeFileNumber);
	
	if(!(treeFile= fopen ( treeFileName , "rb" )))
	{
		printf("Could not open suffix tree file \"%s\" \n", treeFileName);
		return 1;
	}
	
	totalNodes=dividers[treeFileNumber].length;
	
	tree=(STNode*) calloc (totalNodes, sizeof(STNode));
	
	
	if(fread (tree,sizeof(STNode),totalNodes,treeFile)!=totalNodes)
	{
		printf("Error loading tree from file \"%s\" \n", treeFileName);
		return 1;
	}
	fclose(treeFile);
	
	
	//6. Perform blind search
	
	stindex=blindSearch(pattern01,binaryPatternLength,tree, &depth);
	if(stindex==-1)
	{
		printf("Pattern not found (blind search)\n");
		return 0;
	}
	
	//7. Find first leaf
	fileNumber=0;
	binaryStartPos=0;
	depth1=depth;
	findFirstLeaf(tree, stindex, &fileNumber, &binaryStartPos, &depth1,binaryPatternLength);
	
	//8. Verify that pattern match
	sprintf(inputFileName,"%s_%i",inputfilenameprefix,fileNumber);
	if(!(inputFile= fopen ( inputFileName , "rb" )))
	{
		printf("Could not open input DNA file \"%s\" \n", inputFileName);
		return 1;
	}
	
	startPos=binaryStartPos/2;
	fseek(inputFile, startPos, SEEK_CUR);
	
	if(fread(buffer,sizeof(char),patternLength,inputFile)!=(unsigned int)patternLength)
	{
		printf("Error reading input DNA file \"%s\" \n", inputFileName);
		return 1;
	}
	
	fclose(inputFile);
	for(i=0;i<(patternLength);i++)
	{
		if(pattern[i]!=buffer[i])
		{
			printf("Pattern not found after verification\n");
			return 0;
		}
	}
	
	//9. Find all occurences	
	collectAllLeaves(tree, stindex, count,occurences ,binaryPatternLength, depth);

	free(tree);
	return 0;	
}

int findOccurenceRetry(int start, int file, char *treeprefix, char *inputfilenameprefix, char *pattern,
		int patternLength, ResultRecord *occurences, int *count, char *pattern01, char *buffer, 
	unsigned int treeFileNumber )
{
	int i;
	
	
//	char dividersFileName[MAX_PATH_LENGTH];
	FILE *treeFile;
	char treeFileName[MAX_PATH_LENGTH];
	unsigned int totalNodes;
	STNode *tree;
	char inputFileName[MAX_PATH_LENGTH];
	FILE *inputFile;
	int binaryPatternLength=patternLength+patternLength;
	unsigned int bitPrefix;
	
	int depth=0;
	int stindex;
	//1. Tree is 01 - convert pattern into 01 alphabet
	
	int fileNumber;
	int binaryStartPos;
	int depth1=depth;
	int startPos;
	

	
	
	
	get01Encoding(pattern,patternLength,pattern01);	
	//3. We need a binary prefix of length 32 - to locate dividers
	bitPrefix = getBitPrefix(pattern01,binaryPatternLength);
	
	//4. Locate 1-2 trees to be searched - for now suppose 1 tree
	
	
		
	//5. Open and read suffix tree file
	sprintf(treeFileName,"%s_%i",treeprefix,treeFileNumber);
	
	if(!(treeFile= fopen ( treeFileName , "rb" )))
	{
		printf("Could not open suffix tree file \"%s\" \n", treeFileName);
		return 1;
	}
	
	totalNodes=dividers[treeFileNumber].length;
	
	tree=(STNode*) calloc (totalNodes, sizeof(STNode));
	
	
	if(fread (tree,sizeof(STNode),totalNodes,treeFile)!=totalNodes)
	{
		printf("Error loading tree from file \"%s\" \n", treeFileName);
		return 1;
	}
	fclose(treeFile);
	
	
	//6. Perform blind search
	
	stindex=blindSearch(pattern01,binaryPatternLength,tree, &depth);
	if(stindex==-1)
	{
		printf("Pattern not found (blind search)\n");
		return 0;
	}
	
	//7. Find first leaf
	fileNumber=0;
	binaryStartPos=0;
	depth1=depth;
	findFirstLeaf(tree, stindex, &fileNumber, &binaryStartPos, &depth1,binaryPatternLength);
	
	//8. Verify that pattern match
	sprintf(inputFileName,"%s_%i",inputfilenameprefix,fileNumber);
	if(!(inputFile= fopen ( inputFileName , "rb" )))
	{
		printf("Could not open input DNA file \"%s\" \n", inputFileName);
		return 1;
	}
	
	startPos=binaryStartPos/2;
	fseek(inputFile, startPos, SEEK_CUR);
	
	if(fread(buffer,sizeof(char),patternLength,inputFile)!=(unsigned int)patternLength)
	{
		printf("Error reading input DNA file \"%s\" \n", inputFileName);
		return 1;
	}
	
	fclose(inputFile);
	for(i=0;i<(patternLength);i++)
	{
		if(pattern[i]!=buffer[i])
		{
			printf("Pattern not found after verification\n");
			return 0;
		}
	}
	
	//9. Find specific occurence	
	findLeaf(start, file,tree, stindex, count,occurences ,binaryPatternLength, depth);
	
	free(tree);
	if(!found)
	{
		nextSubTree++;
		if(nextSubTree ==totalDividers)
			return 0;
		findOccurenceRetry(start, file, treeprefix, inputfilenameprefix, pattern,
		patternLength, occurences, count, pattern01, buffer, nextSubTree);

	}

	return 0;
}

int findOccurence(int start, int file, char *treeprefix, char *inputfilenameprefix, char *pattern,
		int patternLength, ResultRecord *occurences, int *count, char *pattern01, char *buffer)
{
	int i;
	
	
//	char dividersFileName[MAX_PATH_LENGTH];
	FILE *treeFile;
	char treeFileName[MAX_PATH_LENGTH];
	unsigned int totalNodes;
	STNode *tree;
	char inputFileName[MAX_PATH_LENGTH];
	FILE *inputFile;
	int binaryPatternLength=patternLength+patternLength;
	unsigned int bitPrefix;
	int treeFileNumber;
	int depth=0;
	int stindex;
	//1. Tree is 01 - convert pattern into 01 alphabet
	int depth1=depth;
	int fileNumber;
	int binaryStartPos;
	
	int startPos;
	

	
	
	
	get01Encoding(pattern,patternLength,pattern01);	
	//3. We need a binary prefix of length 32 - to locate dividers
	bitPrefix = getBitPrefix(pattern01,binaryPatternLength);
	
	//4. Locate 1-2 trees to be searched - for now suppose 1 tree
	treeFileNumber=locateTree(dividers,bitPrefix);	
	
	if(treeFileNumber==-1)
	{
		printf("Pattern not found (tree location)\n");
		exit(1);
	}
	
	//5. Open and read suffix tree file
	sprintf(treeFileName,"%s_%i",treeprefix,treeFileNumber);
	
	if(!(treeFile= fopen ( treeFileName , "rb" )))
	{
		printf("Could not open suffix tree file \"%s\" \n", treeFileName);
		return 1;
	}
	
	totalNodes=dividers[treeFileNumber].length;
	
	tree=(STNode*) calloc (totalNodes, sizeof(STNode));
	
	
	if(fread (tree,sizeof(STNode),totalNodes,treeFile)!=totalNodes)
	{
		printf("Error loading tree from file \"%s\" \n", treeFileName);
		return 1;
	}
	fclose(treeFile);
	
	
	//6. Perform blind search
	
	stindex=blindSearch(pattern01,binaryPatternLength,tree, &depth);
	if(stindex==-1)
	{
		printf("Pattern not found (blind search)\n");
		return 0;
	}
	
	//7. Find first leaf
	fileNumber=0;
	binaryStartPos=0;
	depth1=depth;
	findFirstLeaf(tree, stindex, &fileNumber, &binaryStartPos, &depth1,binaryPatternLength);
	
	//8. Verify that pattern match
	sprintf(inputFileName,"%s_%i",inputfilenameprefix,fileNumber);
	if(!(inputFile= fopen ( inputFileName , "rb" )))
	{
		printf("Could not open input DNA file \"%s\" \n", inputFileName);
		return 1;
	}
	
	startPos=binaryStartPos/2;
	fseek(inputFile, startPos, SEEK_CUR);
	
	if(fread(buffer,sizeof(char),patternLength,inputFile)!=(unsigned int)patternLength)
	{
		printf("Error reading input DNA file \"%s\" \n", inputFileName);
		return 1;
	}
	
	fclose(inputFile);
	for(i=0;i<(patternLength);i++)
	{
		if(pattern[i]!=buffer[i])
		{
			printf("Pattern not found after verification\n");
			return 0;
		}
	}
	
	//9. Find specific occurence	
	findLeaf(start, file,tree, stindex, count,occurences ,binaryPatternLength, depth);
	
	free(tree);
	if(!found)
	{
		nextSubTree=treeFileNumber+1;
		if(nextSubTree ==totalDividers)
			return 0;

		findOccurenceRetry(start, file, treeprefix, inputfilenameprefix, pattern,
		patternLength, occurences, count, pattern01, buffer, nextSubTree);

	}
	return 0;
}
